package com.echo.servlet;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.WildcardQuery;


public class EmergingPattern {
	
	BufferedReader reader = null;
	private static String INDEX_DIR = "D:\\luceneIndex";
	int termNum = 0;
	ArrayList<String> termList = new ArrayList<String>();
	
	public String getEmergingPattern(String rest)throws Exception{
		String result = null;
		String line = new String();
		String frequencyFilePath = this.getClass().getResource("/").getPath()+"\\res\\topjterms.txt";
		
		String[] restList = rest.split(" ");
		List<HashMap<String, Integer>> restMaps = new ArrayList<HashMap<String, Integer>>();
		HashMap<String, Integer> globalTerm = new HashMap<String, Integer>();
		for(int m=0; m<restList.length; m++){
			reader = new BufferedReader(new InputStreamReader(new FileInputStream(frequencyFilePath)));
			while((line=reader.readLine())!=null){
				if(line.contains(restList[m])){
					String[] temp = line.split("--");
					if(temp.length>1){
						String[] terms = temp[1].split(",");
						for(int i=0; i<terms.length; i++){
							String tempTerm = (terms[i].split(":"))[0];
							if(globalTerm.get(tempTerm)==null){
								globalTerm.put(tempTerm, 0);
							}else{
								continue;
							}
						}
					}
					break;
				}
			}	
		}
		
		Iterator iter = globalTerm.keySet().iterator();
		int termSize = globalTerm.size();
		termNum = termSize;
		String[] candidateTerm = new String[termSize];
		Float[] candidateTermIDF = new Float[termSize];
		for(int j=0; j<candidateTerm.length; j++){
			candidateTerm[j]= (String)iter.next();
			termList.add(j, candidateTerm[j]);
		}
		for(int jj=0; jj<candidateTermIDF.length; jj++){
			candidateTermIDF[jj] = getLuceneIDF(candidateTerm[jj]);
		}
		
		//use array to store all the TF value. each restaurant's term TF is related with its location in the restList location*globalTermSize+offset 
		float[] frequencyArray = new float[restList.length*termSize];
		for(int n=0; n<restList.length; n++){
			for(int x=0; x<termSize; x++){
				String presentTerm = candidateTerm[x];
				reader = new BufferedReader(new InputStreamReader(new FileInputStream(frequencyFilePath)));
				while((line=reader.readLine())!=null){
					if(line.contains(restList[n])){
						String[] temp = line.split("--");
						boolean foundInFile = false;
						if(temp.length>1){
							String[] terms = temp[1].split(",");
							for(int i=0; i<terms.length; i++){
								//if this term's TF is recorded in the frequency file, use its value directly
								if(presentTerm.equals((terms[i].split(":"))[0])){
									frequencyArray[n*termSize+x] = Integer.parseInt((terms[i].split(":"))[1]);
									foundInFile = true;
									break;
								}
							}
						}
						//if this term's TF is not recorded in the frequency file,
						//because this term is not frequent in this restaurant, use Lucene query to get its TF 
						if(!foundInFile){
							frequencyArray[n*termSize+x] = getLuceneTF(restList[n], presentTerm); 
						}
						break;
					}
				}
			}
		}
		
		for(int xx=0; xx<candidateTermIDF.length; xx++){
			for(int yy=0; yy<restList.length; yy++){
				frequencyArray[yy*termSize+xx] = frequencyArray[yy*termSize+xx]*candidateTermIDF[xx];
			}
		}
		
		StringBuffer tempResult = new StringBuffer();
		for(int x=0; x<frequencyArray.length; x++){
			tempResult.append(frequencyArray[x]+" ");
		}
		for(int y=0; y<candidateTerm.length; y++){
			System.out.print(candidateTerm[y]+" ");
		}
		System.out.println();
		return tempResult.toString();
	}
	
	public int getLuceneTF(String restId, String queryTerm){
		int tf = 0;
		try{
			IndexReader reader = IndexReader.open(INDEX_DIR);
			TermDocs termDocs = reader.termDocs(new Term("reviewContent", queryTerm)); 
			while(termDocs.next()){
//				System.out.println("DocNo:  "+termDocs.doc()+"  , Freq:  "+ termDocs.freq());
				Document doc = reader.document(termDocs.doc());
				List fields = doc.getFields();
//				System.out.println(((Field)fields.get(0)).stringValue());
				String tempRest = ((Field)fields.get(0)).stringValue();
				if(restId.equals(tempRest)){
//					System.out.println(termDocs.freq());
					tf=tf+termDocs.freq();
				}
			}
		}catch(Exception e){
			e.printStackTrace();
		}
		return tf;
	}
	
	public float getLuceneIDF(String rest){
		float idf = 0;
		try{
			Searcher searcher = new IndexSearcher(INDEX_DIR);
			Similarity sim = new DefaultSimilarity();
			idf = sim.idf(new Term("reviewContent", rest), searcher);
		}catch(Exception e){
			e.printStackTrace();
		}
		
		return idf;
	}
}
